# ======================
# Section 0: setup
# =====================
rm(list=ls())
library(data.table)
library(dplyr)
library(dtplyr)
library(ggplot2)
library(Hmisc)
library(stringr)
library(stargazer)
library(locfit)
library(RColorBrewer)
library(grid)
library(gtable)
library(sandwich)
library(plm)
library(lmtest)
library(parallel)
library(multiwayvcov)
library(lfe)
library(haven)
library(R.utils)
library(broom)

comb <- function(...) parse(text = paste(..., sep = ""))
grepv <- function(...) grep(..., value = T)
grepn <- function(string, dataset) {
  if ( !('data.frame' %in% class(dataset))) stop("Error: second argument must be a dataframe")
  if (class(string)!= 'character') stop("Error: first argument is a character")
  grep(string, names(dataset), value = T)
}
 
vecToNA <- function(x) {
  for (j in 1:length(x)) {
    if (j==1)
      myform <- paste("!is.na(",x[j],")")
    if (j>1)
      myform <- paste(myform,"&","!is.na(",x[j],")")
  }
  return(myform)
}

judge_n_cutoff = 100

data <- fread(paste0(dir,"build/Data_ready_with_instruments.csv"),
              na.strings = c(".","NA"))


data[ , judgeyear := paste(judge,year, sep= "")]
data[ , judgemainyear := paste(first_judge_main,year, sep= "")]
data[ analysis_sample == T, landlord_n := .N, by = Landlord]
data[, district := factor(district)]


#FUNCTION TO OUTPUT ONLY TABLES USING STARGAZER

#MAKE SURE TO ROUND BEFORE USING THIS FUNCTION

#FOR MAKING CERTAIN COLUMNS DISPLAY A CERTAIN AMOUNT OF DIGITS: set digs equal to the desired amount (should be equal to rounding previously done),
#and then set col_nums to the col numbers corresponding to the TABLE columns you'd like to have numbers to the rounding amount, and tot_cols to be
#the number of columnsi in the table. Note that this is not always the column number of the dataframe being converted, because the 1st column
#may or may not be names from another source... i dont know if this explains it, so just lmk if it doesn't make sense (there's no reason it should its a mess of a function ;P)

stargazer_tabular_only <- function(..., notabular = F, digs = NULL,col_nums = NULL,tot_cols = NULL){
  out <- capture.output(stargazer(...))
  # restrict to, and including, \begin{tabular} and \end{tabular} lines
  begin <- grep("\\begin{tabular}",out,fixed=TRUE)
  end <- grep("\\end{tabular}",out,fixed=TRUE)

  if (length(begin) == 1 & length(end) == 1) {
    if(notabular==T) out <- out[(begin+1):(end-1)]
    if(notabular==F) out <- out[begin:end]
  } else {
    print("RIP THERE HAS BEEN AN ERROR :'(, see function to see why")
  }

  if (!is.null(col_nums) & !is.null(tot_cols) & !is.null(digs)) {
    table_values <- out[str_count(out,"&") == tot_cols - 1]
    for (colnumba in col_nums) {
      for (rowout in 1:length(table_values)) {
        #capture the number value in between the $s, accounting for &s
        decim <- str_match(table_values[rowout],paste0("^.*?",strrep("&.*?",colnumba-1),"\\$([0-9\\.\\,]+)\\$.*?"))[1,2]
        #need to figure out how many zeros to add
        after_deci <- ifelse(grepl("\\.",decim),nchar(decim)-regexpr("\\.",decim),0)
        diff_deci <- digs-after_deci
        table_values[rowout] <- ifelse(diff_deci>0,
                                       ifelse(after_deci == 0,
                                              sub(paste0("$",decim,"$"),paste0("$",decim,".",strrep(0,diff_deci),"$"),table_values[rowout],fixed=T),
                                              sub(paste0("$",decim,"$"),paste0("$",decim,strrep(0,diff_deci),"$"),table_values[rowout],fixed=T)),
                                       table_values[rowout])
      }
    }
    out[str_count(out,"&") == tot_cols - 1] <- table_values
  }
  cat(paste(out, collapse = "\n"), "\n")
}

get_tex_tabular <- function(file_name,...){
  writeLines(capture.output(stargazer_tabular_only(...)),
             file_name)
}



# ========== BUILDING TYPE INDICATORS =======

data[, black := pred.bla >= .75]
data[, hisp := pred.his >= .75]
data[, white := pred.whi >= .75]
data[, female := gender == "female"]

data[joint %in% c("JOINT ACTION", "FORCIBLE ENTRY AND DETAINER"), joint := case_type == "JOINT ACTION"]

data[, small_landlord := landlord_n <= 5]

# ========== LOOPING TO DO CROSS MONO CHECKS =======

#creating dataset with a lot of the conditions we will want throughout
data = data[ valid_courtroom == T & !is.na(evicted) & !is.na(year) & !is.na(judge) & gender != "either"]

#judge number cutoff
judge_n_cut = 50

#courtsamps is a vector of overall court samples.. 
courtsamples = list(c("iv_sample == T","ivsamp"))

#group_pairs is a list of (grp1,grp2,group) where group is the var
group_pairs = list(c(T,F,"female"),
                   c(T,F,"black"),
                   c(T,F,"joint"),
                   c(T,F,"small_landlord"))

#base form is the form used in the first stage, but will omit the variable used as group each time..
base_form = "evicted ~ stringency_cross + joint + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp + female + pred.bla + pred.whi + pred.his + pred.asi + no_attorney + factor(year) * district"

#dt to hold results
dt_out = data.table()

require(tidyverse)
require(broom)
### MAIN LOOP
for (courtsamp in courtsamples) {
  
  samp = courtsamp[1]
  samp_name = courtsamp[2]
  
  #add the courtsample reqs
  temp_data = data[eval(comb(samp))]
  
  #now loop through groups
  for (grp_pair in group_pairs) {
    
    grp1 = grp_pair[1]
    print(grp1)
    
    grp2 = grp_pair[2]
    grp = grp_pair[3]
    #now loop through booleans for that group
    for (bool in c(T,F)) {
      
      if (bool == T) {
        g1 = grp1
        g2 = grp2
      } else {
        g1 = grp2
        g2 = grp1
      }
      
      a <- unique(temp_data[ get(grp) == g1], by = "case_number")
      a <- a[, .("stringency_cross" = mean(evicted, na.rm = T), 
                 "judge_n_ppl" = .N),
             by = c("judge","year") ]
      
      #impose judge num
      a = a[judge_n_ppl >= judge_n_cut,]
      
      a = unique(a[, .(judge,year,stringency_cross)])
      
      #merge in 
      temp_data1 = merge(temp_data,a, by = c("judge","year"), all.x = F, all.y = F)
      
      #now keep opposite group, with stringency_cross
      temp_data1 = temp_data1[ get(grp) == g2]
      
      #add judge_n_ppl for this group now..
      temp_data1[, judge_n_ppl := .N, by = c("judge","year")]
      temp_data1 = temp_data1[judge_n_ppl >= judge_n_cut,]
      
      #adjust regression formula
      rm = paste0(" ",grp," \\+")
      form = gsub(rm,"",base_form)
      
      #now run first stage
      reg <- lm(as.formula(form), 
                data = temp_data1)
      
      #get stringency coeff and std error, and then wrap up by putting everything in a dt
      t = setDT(tidy(reg))
      t = t[term == "stringency_cross",]
      
      w1 = ifelse(g1 == T,"","not")
      w2 = ifelse(g2 == T,"","not")
      
      dt_temp = data.table("courtsample" = samp_name,
                           "estsample" = paste0(w2,grp),
                           "instsample" = paste0(w1,grp),
                           "beta" = t$estimate,
                           "se" =  t$std.error,
                           "N" = nobs(reg))
      
      #now bind with dt_out
      dt_out = rbindlist(list(dt_out,dt_temp))
    }
  }
}


###SAVE DTOUT
fwrite(dt_out, paste0(dir,"Inputs/Cook_split_sample_mono_checks.csv"))



# ==========================================================================================
# Making First Stage Robustness Table similar to Magne's Family Welfare Cultures' Table IV.
# =========================================================================================

# # ---------------------- Regressions -----------------------------------------
# # baseline
r1 <- lm(evicted ~  Stringency + factor(year)  * district ,
         data = data[ iv_sample == T & gender != "either" ]  )
r2 <- lm(evicted ~  Stringency + case_type + ad_damnum + I(median_rent/1000)  + pct_black + pct_hisp +  gender + pred.bla + pred.whi + pred.his + pred.asi + no_attorney   +
             factor(year)  * district ,
           data = data[ iv_sample == T & gender != "either" ]  )

r32 <- lm(evicted ~  Stringency + Stringency_conti  + Stringency_amount + Stringency_stays
         + case_type + ad_damnum + I(median_rent/1000)  + pct_black + pct_hisp +  gender + pred.bla + pred.whi + pred.his + pred.asi + no_attorney   +
           factor(year)  * district ,
         data = data[ iv_sample == T & gender != "either" ]  )

r4 <- lm(evicted ~  Stringency_first_judge_main + case_type + ad_damnum + I(median_rent/1000)  + pct_black + pct_hisp +  gender + pred.bla + pred.whi + pred.his + pred.asi + no_attorney   +
           factor(year)  * district ,
         data = data[ iv_sample == T & gender != "either" ]  )
r5 <- lm(evicted ~  Stringency_allcases + case_type + ad_damnum + I(median_rent/1000)  + pct_black + pct_hisp +  gender + pred.bla + pred.whi + pred.his + pred.asi + no_attorney   +
           factor(year)  * district ,
         data = data[ gender != "either" ]  )
r7 <- lm(evicted ~  Stringency_excluding_never_served  + case_type + ad_damnum + I(median_rent/1000)  + pct_black + pct_hisp +  gender + pred.bla + pred.whi + pred.his + pred.asi + no_attorney   +
           factor(year)  * district ,
         data = data[ gender != "either" ]  )
r6 <- lm(evicted ~  Stringency_n10 + case_type + ad_damnum + I(median_rent/1000)  + pct_black + pct_hisp +  gender + pred.bla + pred.whi + pred.his + pred.asi + no_attorney   +
           factor(year)  * district ,
         data = data[ analysis_sample == T & gender != "either" ]  )
r8 <- lm(evicted ~  Stringency_noNcutoff  + case_type + ad_damnum + I(median_rent/1000)  + pct_black + pct_hisp +  gender + pred.bla + pred.whi + pred.his + pred.asi + no_attorney +
           factor(year)  * district ,
         data = data[ analysis_sample == T & gender != "either" ]  )
r10 <- lm(evicted ~ Stringency_single_action_only   + ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender + pred.bla + pred.whi + pred.his + pred.asi  + no_attorney  +
              factor(year) * district ,
          data = data[  iv_sample == T & gender != "either"   & case_type == "JOINT ACTION"]   )
r11 <- lm(evicted ~ Stringency_joint_action_only  +  ad_damnum + I(median_rent/1000) + pct_black + pct_hisp   + gender  + pred.bla + pred.whi + pred.his + pred.asi  + no_attorney  +
              factor(year) * district ,
          data = data[  iv_sample == T & gender != "either"   & case_type == "FORCIBLE ENTRY AND DETAINER"]   )


regs <- list(r2,r32,r4,r5,r6,r7,r8,r10,r11)
require(lmtest)
require(sandwich)
sample = c("Main", "Controlling for other judge chars.",
           "Alternate first judge construction","All cases","Including judges > 10 cases",
           "Excluding cases never served", "Including judges < 10 cases","Single-Action stringency on Joint-Action cases"
           ,"Joint-Action stringency on Single-Action cases")
coefs  = sapply(regs, function(x) coef(x)[[2]])

ses  = sapply(regs, function(x)  coeftest(x, vcov=cluster.vcov(x, ~ judgeyear))[2,2] )
ses[3] = coeftest(regs[[3]], vcov=cluster.vcov(regs[[3]], ~ judgemainyear))[2,2]

require(broom) 
pvals = sapply(regs, function(x)  coeftest(x, vcov=cluster.vcov(x, ~ judgeyear))[2,4] ) %>% round(4)
pvals[3] = coeftest(regs[[3]], vcov=cluster.vcov(regs[[3]], ~ judgemainyear))[2,4]  %>% round(4)
nobs = sapply(regs, function(x) length(residuals(x)) )

first_stage_check <- data.table("Sample" = sample,
                         "Coefficient" = coefs,
                         "Standard Errors" = ses,
                         "P-Value" = pvals,
                         "Observations" = nobs)
first_stage_check[, 2:5] <- round(first_stage_check[,-1],3)


get_tex_tabular(file_name="Inputs/Cook_stringency_specification_checks.tex",
                first_stage_check, summary = F, type = "latex", rownames = F, digits = 3,
                col_nums = c(2,3,4),tot_cols = 5, digs = 3)


